From 4b5ff80474146326fbb0ae2bcacd7bcd1395bfe7 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk" Date: Wed, 8 Oct 2003 12:56:46 +0000 Subject: [PATCH] bitkeeper revision 1.493 (3f84098eCbgC6OdGchmKMJlm84nGJw) memory.c, hypervisor.h, hypervisor-if.h, domain.c, entry.S: Faster page fault handling --- fast path in Xen if mapping to be updated is part of current address space. --- xen/arch/i386/entry.S | 1 + xen/common/domain.c | 2 + xen/common/memory.c | 119 +++++++++++++----- xen/include/hypervisor-ifs/hypervisor-if.h | 5 + .../include/asm-xeno/hypervisor.h | 12 ++ xenolinux-2.4.22-sparse/mm/memory.c | 44 ++++++- 6 files changed, 149 insertions(+), 34 deletions(-) diff --git a/xen/arch/i386/entry.S b/xen/arch/i386/entry.S index 3dcbff11c6..2fe19dfb68 100644 --- a/xen/arch/i386/entry.S +++ b/xen/arch/i386/entry.S @@ -647,6 +647,7 @@ ENTRY(hypervisor_call_table) .long SYMBOL_NAME(do_dom_mem_op) .long SYMBOL_NAME(do_multicall) .long SYMBOL_NAME(do_kbd_op) + .long SYMBOL_NAME(do_update_va_mapping) .rept NR_syscalls-((.-hypervisor_call_table)/4) .long SYMBOL_NAME(sys_ni_syscall) .endr diff --git a/xen/common/domain.c b/xen/common/domain.c index 0cd37ec261..29e9cd02f5 100644 --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -163,6 +163,8 @@ void __kill_domain(struct task_struct *p) void kill_domain(void) { + /* May have been in middle of a p.t. update with WP bit cleared. */ + write_cr0(read_cr0()|X86_CR0_WP); __kill_domain(current); } diff --git a/xen/common/memory.c b/xen/common/memory.c index 78c06ce79c..af53536a07 100644 --- a/xen/common/memory.c +++ b/xen/common/memory.c @@ -550,9 +550,7 @@ static int mod_l2_entry(l2_pgentry_t *p_l2_entry, l2_pgentry_t new_l2_entry) l2_pgentry_val(new_l2_entry)) & 0xfffff001) != 0 ) { if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) ) - { put_l1_table(l2_pgentry_to_pagenr(old_l2_entry)); - } /* Assume we're mapping an L1 table, falling back to twisted L2. */ if ( unlikely(get_l1_table(l2_pgentry_to_pagenr(new_l2_entry))) ) @@ -601,15 +599,12 @@ static int mod_l1_entry(l1_pgentry_t *p_l1_entry, l1_pgentry_t new_l1_entry) l1_pgentry_val(new_l1_entry)) & 0xfffff003) != 0 ) { if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) ) - { put_page(l1_pgentry_to_pagenr(old_l1_entry), l1_pgentry_val(old_l1_entry) & _PAGE_RW); - } - + if ( get_page(l1_pgentry_to_pagenr(new_l1_entry), - l1_pgentry_val(new_l1_entry) & _PAGE_RW) ){ + l1_pgentry_val(new_l1_entry) & _PAGE_RW) ) goto fail; - } } } else if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) ) @@ -753,17 +748,12 @@ int do_process_page_updates(page_update_request_t *ureqs, int count) struct pfn_info *page; int err = 0, i; unsigned int cmd; - unsigned long cr0 = read_cr0(); - - /* Clear the WP bit so that we can write even read-only page mappings. */ - write_cr0(cr0 & ~X86_CR0_WP); + unsigned long cr0 = 0; for ( i = 0; i < count; i++ ) { if ( copy_from_user(&req, ureqs, sizeof(req)) ) - { kill_domain_with_errmsg("Cannot read page update request"); - } cmd = req.ptr & (sizeof(l1_pgentry_t)-1); pfn = req.ptr >> PAGE_SHIFT; @@ -773,26 +763,23 @@ int do_process_page_updates(page_update_request_t *ureqs, int count) spin_lock_irq(¤t->page_lock); /* Get the page-frame number that a non-extended command references. */ - if ( likely(cmd != PGREQ_EXTENDED_COMMAND) ) + if ( (cmd == PGREQ_NORMAL_UPDATE) || (cmd == PGREQ_UNCHECKED_UPDATE) ) { - if ( likely(cmd != PGREQ_MPT_UPDATE) ) + if ( cr0 == 0 ) { - /* Need to use 'get_user' since the VA's PGD may be absent. */ - __get_user(l1e, (unsigned long *)(linear_pg_table+pfn)); - /* Now check that the VA's PTE isn't absent. */ - if ( !(l1e & _PAGE_PRESENT) ) - { - MEM_LOG("L1E n.p. at VA %08lx (%08lx)", req.ptr&~3, l1e); - goto unlock; - } - /* Finally, get the underlying machine address. */ - pfn = l1e >> PAGE_SHIFT; + cr0 = read_cr0(); + write_cr0(cr0 & ~X86_CR0_WP); } - else if ( pfn >= max_page ) + /* Need to use 'get_user' since the VA's PGD may be absent. */ + __get_user(l1e, (unsigned long *)(linear_pg_table+pfn)); + /* Now check that the VA's PTE isn't absent. */ + if ( !(l1e & _PAGE_PRESENT) ) { - MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page); + MEM_LOG("L1E n.p. at VA %08lx (%08lx)", req.ptr&~3, l1e); goto unlock; } + /* Finally, get the underlying machine address. */ + pfn = l1e >> PAGE_SHIFT; } /* Least significant bits of 'ptr' demux the operation type. */ @@ -850,7 +837,11 @@ int do_process_page_updates(page_update_request_t *ureqs, int count) case PGREQ_MPT_UPDATE: page = frame_table + pfn; - if ( DOMAIN_OKAY(page->flags) ) + if ( pfn >= max_page ) + { + MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page); + } + else if ( DOMAIN_OKAY(page->flags) ) { machine_to_phys_mapping[pfn] = req.val; err = 0; @@ -892,9 +883,77 @@ int do_process_page_updates(page_update_request_t *ureqs, int count) } - /* Restore the WP bit before returning to guest. */ - write_cr0(cr0); + if ( cr0 != 0 ) + write_cr0(cr0); return 0; } + +/* + * Note: This function is structured this way so that the common path is very + * fast. Tests that are unlikely to be TRUE branch to out-of-line code. + * Unfortunately GCC's 'unlikely()' macro doesn't do the right thing :-( + */ +int do_update_va_mapping(unsigned long page_nr, + unsigned long val, + unsigned long flags) +{ + unsigned long _x, cr0 = 0; + struct task_struct *p = current; + int err = -EINVAL; + + if ( page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT) ) + goto out; + + spin_lock_irq(&p->page_lock); + + /* Check that the VA's page-directory entry is present.. */ + if ( (err = __get_user(_x, (unsigned long *) + (&linear_pg_table[page_nr]))) != 0 ) + goto unlock_and_out; + + /* If the VA's page-directory entry is read-only, we frob the WP bit. */ + if ( __put_user(_x, (unsigned long *)(&linear_pg_table[page_nr])) ) + goto clear_wp; return_from_clear_wp: + + if ( (err = mod_l1_entry(&linear_pg_table[page_nr], + mk_l1_pgentry(val))) != 0 ) + goto bad; + + if ( (flags & UVMF_INVLPG) ) + goto invlpg; return_from_invlpg: + + if ( (flags & UVMF_FLUSH_TLB) ) + goto flush; return_from_flush: + + if ( cr0 != 0 ) + goto write_cr0; return_from_write_cr0: + + unlock_and_out: + spin_unlock_irq(&p->page_lock); + out: + return err; + + clear_wp: + cr0 = read_cr0(); + write_cr0(cr0 & ~X86_CR0_WP); + goto return_from_clear_wp; + + bad: + spin_unlock_irq(&p->page_lock); + kill_domain_with_errmsg("Illegal VA-mapping update request"); + return 0; + + invlpg: + flush_tlb[p->processor] = 1; + goto return_from_invlpg; + + flush: + __write_cr3_counted(pagetable_val(p->mm.pagetable)); + goto return_from_flush; + + write_cr0: + write_cr0(cr0); + goto return_from_write_cr0; +} diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index e49e3a9265..4405191a6d 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -60,6 +60,7 @@ #define __HYPERVISOR_dom_mem_op 17 #define __HYPERVISOR_multicall 18 #define __HYPERVISOR_kbd_op 19 +#define __HYPERVISOR_update_va_mapping 20 /* And the trap vector is... */ #define TRAP_INSTR "int $0x82" @@ -142,6 +143,10 @@ #define PGEXT_CMD_MASK 255 #define PGEXT_CMD_SHIFT 8 +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ +#define UVMF_FLUSH_TLB 1 /* Flush entire TLB. */ +#define UVMF_INVLPG 2 /* Flush the VA mapping being updated. */ + /* * Master "switch" for enabling/disabling event delivery. */ diff --git a/xenolinux-2.4.22-sparse/include/asm-xeno/hypervisor.h b/xenolinux-2.4.22-sparse/include/asm-xeno/hypervisor.h index 763ff81bfe..b0708dc6c3 100644 --- a/xenolinux-2.4.22-sparse/include/asm-xeno/hypervisor.h +++ b/xenolinux-2.4.22-sparse/include/asm-xeno/hypervisor.h @@ -369,4 +369,16 @@ static inline long HYPERVISOR_kbd_op(unsigned char op, unsigned char val) return ret; } +static inline int HYPERVISOR_update_va_mapping( + unsigned long page_nr, pte_t new_val, unsigned long flags) +{ + int ret; + __asm__ __volatile__ ( + TRAP_INSTR + : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping), + "b" (page_nr), "c" ((new_val).pte_low), "d" (flags) ); + + return ret; +} + #endif /* __HYPERVISOR_H__ */ diff --git a/xenolinux-2.4.22-sparse/mm/memory.c b/xenolinux-2.4.22-sparse/mm/memory.c index bdefce07d1..37fb2afd84 100644 --- a/xenolinux-2.4.22-sparse/mm/memory.c +++ b/xenolinux-2.4.22-sparse/mm/memory.c @@ -918,8 +918,18 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long */ static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry) { +#ifdef CONFIG_XENO + if ( likely(vma->vm_mm == current->mm) ) { + XENO_flush_page_update_queue(); + HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, UVMF_INVLPG); + } else { + set_pte(page_table, entry); + flush_tlb_page(vma, address); + } +#else set_pte(page_table, entry); flush_tlb_page(vma, address); +#endif update_mmu_cache(vma, address, entry); } @@ -1183,11 +1193,20 @@ static int do_swap_page(struct mm_struct * mm, flush_page_to_ram(page); flush_icache_page(vma, page); +#ifdef CONFIG_XENO + if ( likely(vma->vm_mm == current->mm) ) { + XENO_flush_page_update_queue(); + HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, pte, 0); + } else { + set_pte(page_table, pte); + XENO_flush_page_update_queue(); + } +#else set_pte(page_table, pte); +#endif /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); - XENO_flush_page_update_queue(); spin_unlock(&mm->page_table_lock); return ret; } @@ -1229,11 +1248,20 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, mark_page_accessed(page); } +#ifdef CONFIG_XENO + if ( likely(vma->vm_mm == current->mm) ) { + XENO_flush_page_update_queue(); + HYPERVISOR_update_va_mapping(addr>>PAGE_SHIFT, entry, 0); + } else { + set_pte(page_table, entry); + XENO_flush_page_update_queue(); + } +#else set_pte(page_table, entry); +#endif /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, entry); - XENO_flush_page_update_queue(); spin_unlock(&mm->page_table_lock); return 1; /* Minor fault */ @@ -1304,7 +1332,17 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, entry = mk_pte(new_page, vma->vm_page_prot); if (write_access) entry = pte_mkwrite(pte_mkdirty(entry)); +#ifdef CONFIG_XENO + if ( likely(vma->vm_mm == current->mm) ) { + XENO_flush_page_update_queue(); + HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, 0); + } else { + set_pte(page_table, entry); + XENO_flush_page_update_queue(); + } +#else set_pte(page_table, entry); +#endif } else { /* One of our sibling threads was faster, back out. */ page_cache_release(new_page); @@ -1314,7 +1352,6 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma, /* no need to invalidate: a not-present page shouldn't be cached */ update_mmu_cache(vma, address, entry); - XENO_flush_page_update_queue(); spin_unlock(&mm->page_table_lock); return 2; /* Major fault */ } @@ -1366,7 +1403,6 @@ static inline int handle_pte_fault(struct mm_struct *mm, } entry = pte_mkyoung(entry); establish_pte(vma, address, pte, entry); - XENO_flush_page_update_queue(); spin_unlock(&mm->page_table_lock); return 1; } -- 2.30.2